@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); // When split-update is enabled, we do not need to account for buckets that aren't covered. // This is a huge performance benefit of split-update. And the reason why we are able to // do so is because the 'deltas' here are actually only the delete_deltas. All the insert_deltas // with valid user payload data has already been considered as base for the covered buckets. // Hence, the uncovered buckets do not have any relevant data and we can just ignore them. if (acidOperationalProperties != null && acidOperationalProperties.isSplitUpdate()) { return splits; // return an empty list. } // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. if (!deltas.isEmpty()) { for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1, -1)); } } } return splits; }
@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); for (HdfsFileStatusWithId file : fileStatuses) { FileStatus fileStatus = file.getFileStatus(); if (fileStatus.getLen() != 0) { Object fileKey = file.getFileId(); if (fileKey == null && allowSyntheticFileIds) { fileKey = new SyntheticFileId(fileStatus); } TreeMap<Long, BlockLocation> blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus); for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) { OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), fileKey, entry.getKey(), entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, deltas, -1, fileStatus.getLen()); splits.add(orcSplit); } } } // add uncovered ACID delta splits splits.addAll(super.getSplits()); return splits; }
fileKey = new SyntheticFileId(file); return new OrcSplit(file.getPath(), fileKey, offset, length, hosts, orcTail, isOriginal, hasBase, deltas, scaledProjSize, fileLen, rootDir, offsetAndBucket);
fileKey = new SyntheticFileId(file); return new OrcSplit(file.getPath(), fileKey, offset, length, hosts, orcTail, isOriginal, hasBase, deltas, scaledProjSize, fileLen);
OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), fileKey, entry.getKey(), entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, deltas, -1, logicalLen, dir, offsetAndBucket);
OrcSplit split = new OrcSplit(originalFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, split = new OrcSplit(originalFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, split = new OrcSplit(originalFilePath, null, stripe.getOffset(), stripe.getLength() - 50, split = new OrcSplit(originalFilePath, null, stripe.getOffset(), stripe.getLength() + 50, split = new OrcSplit(originalFilePath, null, stripe.getOffset() - 50, stripe.getLength() + 50, split = new OrcSplit(originalFilePath, null, stripes.get(0).getOffset() + 50, reader.getContentLength() - 50, split = new OrcSplit(originalFilePath, null, stripes.get(0).getOffset(), reader.getContentLength(),
OrcSplit split = new OrcSplit(acidFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, split = new OrcSplit(acidFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, split = new OrcSplit(acidFilePath, null, stripe.getOffset(), stripe.getLength() - 50, split = new OrcSplit(acidFilePath, null, stripe.getOffset(), stripe.getLength() + 50, split = new OrcSplit(acidFilePath, null, stripe.getOffset() - 50, stripe.getLength() + 50, split = new OrcSplit(acidFilePath, null, stripes.get(0).getOffset() + 50, reader.getContentLength() - 50, split = new OrcSplit(acidFilePath, null, stripes.get(0).getOffset(), reader.getContentLength(),
conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false"); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2"); OrcSplit split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength, workDir, null); conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false"); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2,3"); split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength, workDir, null);
", offset " + lastStripeOffset + ", length " + lastStripeLength); OrcSplit split = new OrcSplit(testFilePath, null, lastStripeOffset + 1, lastStripeLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength, workDir, null);
@Override public List<OrcSplit> getSplits() throws IOException { // Generate a split for any buckets that weren't covered. // This happens in the case where a bucket just has deltas and no // base. List<OrcSplit> splits = Lists.newArrayList(); if (!deltas.isEmpty()) { for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1)); } } } return splits; }
@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { String[] hosts = SHIMS.getLocationsWithOffset(fs, fileStatus).firstEntry().getValue() .getHosts(); OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts, null, isOriginal, true, deltas, -1); splits.add(orcSplit); } // add uncovered ACID delta splits splits.addAll(super.getSplits()); return splits; }
hostList.toArray(hosts); return new OrcSplit(file.getPath(), offset, length, hosts, fileMetaInfo, isOriginal, hasBase, deltas, projColsUncompressedSize);