OrcFile.readerOptions(conf).filesystem(fs));
/** * Create an ORC file reader. * @param fs file system * @param path file name to read from * @return a new ORC file reader. * @throws IOException */ public static Reader createReader(FileSystem fs, Path path) throws IOException { ReaderOptions opts = new ReaderOptions(new Configuration()); opts.filesystem(fs); return new ReaderImpl(path, opts); }
OrcFile.readerOptions(conf).maxLength(length)); AcidStats acidStats = OrcAcidUtils.parseAcidStats(deleteDeltaReader); if (acidStats.deletes == 0) {
if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); ReaderPair deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, eventOptions);
private void populateAndCacheStripeDetails() throws IOException { Reader orcReader = OrcFile.createReader(file.getPath(), OrcFile.readerOptions(context.conf).filesystem(fs)); List<String> projCols = ColumnProjectionUtils.getReadColumnNames(context.conf); projColsUncompressedSize = orcReader.getRawDataSizeOfColumns(projCols);
private static void assertFileContentsOrcHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
private static void assertFileContentsOrcHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
private static void assertFileContentsOrcHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
/** * Ensures orcReader is initialized for the split. */ private void ensureOrcReader() throws IOException { if (orcReader != null) return; path = split.getPath(); if (fileKey instanceof Long && HiveConf.getBoolVar( daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) { path = HdfsUtils.getFileIdPath(fs, path, (long)fileKey); } LlapIoImpl.ORC_LOGGER.trace("Creating reader for {} ({})", path, split.getPath()); long startTime = counters.startTimeCounter(); ReaderOptions opts = OrcFile.readerOptions(jobConf).filesystem(fs).fileMetadata(fileMetadata); if (split instanceof OrcSplit) { OrcTail orcTail = ((OrcSplit) split).getOrcTail(); if (orcTail != null) { LlapIoImpl.ORC_LOGGER.debug("Setting OrcTail. path={}", path); opts.orcTail(orcTail); } } orcReader = EncodedOrcFile.createReader(path, opts); counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime); }
@Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files ) throws IOException { if (Utilities.isVectorMode(conf)) { return new VectorizedOrcInputFormat().validateInput(fs, conf, files); } if (files.size() <= 0) { return false; } for (FileStatus file : files) { try { OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { return false; } } return true; }
@Override public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files ) throws IOException { if (files.size() <= 0) { return false; } for (FileStatus file : files) { try { OrcFile.createReader(file.getPath(), OrcFile.readerOptions(conf).filesystem(fs)); } catch (IOException e) { return false; } } return true; } }
@Override public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit fSplit = (FileSplit)inputSplit; reporter.setStatus(fSplit.toString()); Path path = fSplit.getPath(); OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf); if(fSplit instanceof OrcSplit){ OrcSplit orcSplit = (OrcSplit) fSplit; if (orcSplit.hasFooter()) { opts.fileMetaInfo(orcSplit.getFileMetaInfo()); } } Reader reader = OrcFile.createReader(path, opts); return new VectorizedOrcRecordReader(reader, conf, fSplit); }
public static ReaderOptions readerOptions(Configuration conf) { return new ReaderOptions(conf); }
/** * Create an ORC file reader. * @param fs file system * @param path file name to read from * @return a new ORC file reader. * @throws IOException */ public static Reader createReader(FileSystem fs, Path path ) throws IOException { ReaderOptions opts = new ReaderOptions(new Configuration()); opts.filesystem(fs); return new ReaderImpl(path, opts); }
public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) throws IOException { path = split.getPath(); start = split.getStart(); end = start + split.getLength(); FileSystem fs = path.getFileSystem(conf); this.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs)); this.iter = reader.getStripes().iterator(); this.stripeIdx = 0; this.stripeStatistics = ((ReaderImpl) reader).getOrcProtoStripeStatistics(); }
Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length));
new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc")); Reader reader = OrcFile.createReader(oldFilePath, OrcFile.readerOptions(conf).filesystem(fs));
writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
writer.close(); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if(eventOptions.getSearchArgument() != null) {