configuration, file, cacheKey, range(split.getStart(), split.getEnd()), cacheTag); MessageType fileSchema = footer.getFileMetaData().getSchema(); FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {
FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {
FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {
FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {
/** * {@inheritDoc} */ @Override public RecordReader<Void, T> createRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Configuration conf = ContextUtil.getConfiguration(taskAttemptContext); ReadSupport<T> readSupport = getReadSupport(conf); return new ParquetRecordReader<T>(readSupport, getFilter(conf)); }
/** * {@inheritDoc} */ @Override public RecordReader<Void, T> createRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Configuration conf = ContextUtil.getConfiguration(taskAttemptContext); ReadSupport<T> readSupport = getReadSupport(conf); return new ParquetRecordReader<T>(readSupport, getFilter(conf)); }
public Builder(Configuration conf) { this.conf = conf; useSignedStringMinMax(conf.getBoolean("parquet.strings.signed-min-max.enabled", false)); useDictionaryFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true)); useStatsFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true)); useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true)); withCodecFactory(HadoopCodecs.newFactory(conf, 0)); withRecordFilter(getFilter(conf)); withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608)); String badRecordThresh = conf.get(BAD_RECORD_THRESHOLD_CONF_KEY); if (badRecordThresh != null) { set(BAD_RECORD_THRESHOLD_CONF_KEY, badRecordThresh); } }
public Builder(Configuration conf) { this.conf = conf; useSignedStringMinMax(conf.getBoolean("parquet.strings.signed-min-max.enabled", false)); useDictionaryFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true)); useStatsFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true)); useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true)); useColumnIndexFilter(conf.getBoolean(COLUMN_INDEX_FILTERING_ENABLED, true)); withCodecFactory(HadoopCodecs.newFactory(conf, 0)); withRecordFilter(getFilter(conf)); withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608)); String badRecordThresh = conf.get(BAD_RECORD_THRESHOLD_CONF_KEY); if (badRecordThresh != null) { set(BAD_RECORD_THRESHOLD_CONF_KEY, badRecordThresh); } }
throws IOException { List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>(); Filter filter = ParquetInputFormat.getFilter(configuration);
throws IOException { List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>(); Filter filter = ParquetInputFormat.getFilter(configuration);
@Override public IPentahoRecordReader createRecordReader( IPentahoInputSplit split ) throws Exception { return inClassloader( () -> { PentahoInputSplitImpl pentahoInputSplit = (PentahoInputSplitImpl) split; InputSplit inputSplit = pentahoInputSplit.getInputSplit(); ReadSupport<RowMetaAndData> readSupport = new PentahoParquetReadSupport(); ParquetRecordReader<RowMetaAndData> nativeRecordReader = new ParquetRecordReader<RowMetaAndData>( readSupport, ParquetInputFormat.getFilter( job .getConfiguration() ) ); TaskAttemptContextImpl task = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID() ); nativeRecordReader.initialize( inputSplit, task ); return new PentahoParquetRecordReader( nativeRecordReader ); } ); }
public RecordReaderWrapper( InputSplit oldSplit, JobConf oldJobConf, Reporter reporter) throws IOException { splitLen = oldSplit.getLength(); try { realReader = new ParquetRecordReader<V>( ParquetInputFormat.<V>getReadSupportInstance(oldJobConf), ParquetInputFormat.getFilter(oldJobConf)); if (oldSplit instanceof ParquetInputSplitWrapper) { realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter); } else if (oldSplit instanceof FileSplit) { realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter); } else { throw new IllegalArgumentException( "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit); } // read once to gain access to key and value objects if (realReader.nextKeyValue()) { firstRecord = true; valueContainer = new Container<V>(); valueContainer.set(realReader.getCurrentValue()); } else { eof = true; } } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } }
public RecordReaderWrapper( InputSplit oldSplit, JobConf oldJobConf, Reporter reporter) throws IOException { splitLen = oldSplit.getLength(); try { realReader = new ParquetRecordReader<V>( ParquetInputFormat.<V>getReadSupportInstance(oldJobConf), ParquetInputFormat.getFilter(oldJobConf)); if (oldSplit instanceof ParquetInputSplitWrapper) { realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter); } else if (oldSplit instanceof FileSplit) { realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter); } else { throw new IllegalArgumentException( "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit); } // read once to gain access to key and value objects if (realReader.nextKeyValue()) { firstRecord = true; valueContainer = new Container<V>(); valueContainer.set(realReader.getCurrentValue()); } else { eof = true; } } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } }
FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {
FilterCompat.Filter filter = getFilter(configuration); blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); } else {