bucket, validWriteIdList, new Reader.Options(conf), deltaDirectory, mergerOptions);
@Override public RawReader<OrcStruct> getRawReader(Configuration conf, boolean collapseEvents, int bucket, ValidTxnList validTxnList, Path baseDirectory, Path[] deltaDirectory ) throws IOException { Reader reader = null; boolean isOriginal = false; if (baseDirectory != null) { Path bucketFile; if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) { bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket); } else { isOriginal = true; bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf), baseDirectory, bucket); } reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf)); } return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal, bucket, validTxnList, new Reader.Options(), deltaDirectory); }
Reader.Options options = new Reader.Options(conf).schema(schema);
fs.create(root); ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, null, null, new Reader.Options(), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0); assertEquals("first", value(pair.nextRecord())); assertEquals(0, key.getWriteId());
OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(false)); RecordIdentifier key = merger.createKey();
new Reader.Options(), new HiveConf()); RecordReader recordReader = pair.getRecordReader(); assertEquals(10, key.getWriteId());
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); if (isTransactionalTableScan) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options().range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options); }
@Test public void testReaderPair() throws Exception { ReaderKey key = new ReaderKey(); Reader reader = createMockReader(); RecordIdentifier minKey = new RecordIdentifier(10, 20, 30); RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60); ReaderPair pair = new OrcRawRecordMerger.ReaderPairAcid(key, reader, minKey, maxKey, new Reader.Options(), new HiveConf()); RecordReader recordReader = pair.getRecordReader(); assertEquals(10, key.getWriteId()); assertEquals(20, key.getBucketProperty()); assertEquals(40, key.getRowId()); assertEquals(120, key.getCurrentWriteId()); assertEquals("third", value(pair.nextRecord())); pair.next(pair.nextRecord()); assertEquals(40, key.getWriteId()); assertEquals(50, key.getBucketProperty()); assertEquals(60, key.getRowId()); assertEquals(130, key.getCurrentWriteId()); assertEquals("fourth", value(pair.nextRecord())); pair.next(pair.nextRecord()); assertEquals(null, pair.nextRecord()); Mockito.verify(recordReader).close(); }
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { if (AcidUtils.isFullAcidScan(conf)) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options(conf).range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options, conf); }
@Override public RecordReader rows(boolean[] include) throws IOException { return rowsOptions(new Options().include(include)); }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options().schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
@Override public RecordReader rows() throws IOException { return rowsOptions(new Options()); }
@Override public RecordReader rows(long offset, long length, boolean[] include ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length)); }
@Override public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length) .searchArgument(sarg, columnNames)); }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options(conf).schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); //todo: last param is bogus. why is this hardcoded? OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
@Override public RecordReader rows(boolean[] include) throws IOException { return rowsOptions(new Options().include(include)); }
@Override public RecordReader rows() throws IOException { return rowsOptions(new Options()); }
@Override public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length) .searchArgument(sarg, columnNames)); }
@Override public RecordReader rows(long offset, long length, boolean[] include ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length)); }
@Override public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) { if (readerSchema == null) { readerSchema = fileSchema; } // TODO: will this work correctly with ACID? boolean[] readerIncludes = OrcInputFormat.genIncludedColumns( readerSchema, readerLogicalColumnIds); Reader.Options options = new Reader.Options(jobConf) .include(readerIncludes).includeAcidColumns(includeAcidColumns); return new SchemaEvolution(fileSchema, readerSchema, options); }